library(zoo)
library(lubridate)
library(mgcv)
library(TSA)
library(dynlm)
library(tseries)
library(vars)
library(rugarch)
library(forecast)
library(wesanderson)
library(knitr)
#Read in data, split into full and abridged data
full.data<-read.csv("jena_climate_2009_2016.csv")
full.data$Date.Time <- as.POSIXct(full.data$Date.Time, format="%d.%m.%Y %H:%M:%S")
full.data$Date <- as.Date(full.data$Date.Time)
full.data$Hour <- hour(full.data$Date.Time)
df <- data.frame(datetime = full.data$Date.Time, date = full.data$Date, hour = full.data$Date.Time, pressure <- full.data$p..mbar., temperature = full.data$Tpot..K., humidity = full.data$sh..g.kg.)
data.daily <- aggregate(df[-c(1,2,3)], list(df$date), mean)
colnames(data.daily) <- c("date", "pressure", "temperature", "humidity")
data.daily$month <- month(data.daily$date)
data.hourly <- aggregate(df[-c(1,2,3)], list(df$date, df$hour), mean)
colnames(data.hourly) <- c("date", "hour", "pressure", "temperature", "humidity")
data.hourly$month <- month(data.hourly$date)
daily.training.data <- data.daily[1:2879,]
daily.training.date <- data.daily[1:2879,1]
daily.testing.data <- data.daily[2880:2921,]
daily.testing.date <- data.daily[2880:2921,1]
daily.training.month <- data.daily[1:2879,5]
daily.testing.month <- data.daily[2880:2921,5]
test.yday <- yday(daily.testing.date[1])
daily.pressure.ts <- ts(data.daily$pressure, frequency = 365, start=c(2009,1))
daily.temperature.ts <- ts(data.daily$temperature, frequency = 365, start=c(2009,1))
daily.humidity.ts <- ts(data.daily$humidity, frequency = 365, start=c(2009,1))
daily.pressure.train.ts <- ts(daily.training.data$pressure, frequency = 365, start=c(2009,1))
daily.temp.train.ts <- ts(daily.training.data$temperature, frequency = 365, start=c(2009,1))
daily.humidity.train.ts <- ts(daily.training.data$humidity, frequency = 365, start=c(2009,1))
daily.pressure.test.ts <- ts(daily.testing.data$pressure, frequency = 365, start=c(2016,test.yday))
daily.temp.test.ts <- ts(daily.testing.data$temperature, frequency = 365, start=c(2016,test.yday))
daily.humidity.test.ts <- ts(daily.testing.data$humidity, frequency = 365, start=c(2016,test.yday))
ts.plot(daily.pressure.train.ts, type="l", main=paste("pressure"),ylab="")
ts.plot(daily.temp.train.ts, type="l", main=paste("temperature"),ylab="")
ts.plot(daily.humidity.train.ts, type="l", main=paste("humidity"),ylab="")
stats::acf(as.matrix(daily.training.data[-c(1,5)]))
daily.time.pts <- c(1:length(daily.training.date))
daily.time.pts <- c(daily.time.pts - min(daily.time.pts))/max(daily.time.pts)
gam1.fitted = gam(daily.pressure.train.ts ~ s(daily.time.pts)+factor(daily.training.month))
summary(gam1.fitted)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## daily.pressure.train.ts ~ s(daily.time.pts) + factor(daily.training.month)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 988.29162 0.50345 1963.024 < 2e-16 ***
## factor(daily.training.month)2 -0.72118 0.72098 -1.000 0.317262
## factor(daily.training.month)3 2.35401 0.70459 3.341 0.000846 ***
## factor(daily.training.month)4 0.07709 0.71134 0.108 0.913712
## factor(daily.training.month)5 0.32222 0.70677 0.456 0.648492
## factor(daily.training.month)6 1.02128 0.71413 1.430 0.152798
## factor(daily.training.month)7 0.53038 0.71017 0.747 0.455225
## factor(daily.training.month)8 1.23343 0.71230 1.732 0.083448 .
## factor(daily.training.month)9 2.52162 0.72038 3.500 0.000472 ***
## factor(daily.training.month)10 2.44909 0.71786 3.412 0.000655 ***
## factor(daily.training.month)11 -1.12845 0.72876 -1.548 0.121624
## factor(daily.training.month)12 0.33730 0.73621 0.458 0.646873
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(daily.time.pts) 8.653 8.965 8.836 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0419 Deviance explained = 4.85%
## GCV = 61.873 Scale est. = 61.429 n = 2879
gam2.fitted = gam(daily.temp.train.ts ~ s(daily.time.pts)+factor(daily.training.month))
summary(gam2.fitted)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## daily.temp.train.ts ~ s(daily.time.pts) + factor(daily.training.month)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 274.0294 0.2670 1026.228 < 2e-16 ***
## factor(daily.training.month)2 0.7065 0.3824 1.848 0.0647 .
## factor(daily.training.month)3 4.4256 0.3737 11.843 < 2e-16 ***
## factor(daily.training.month)4 9.8243 0.3773 26.041 < 2e-16 ***
## factor(daily.training.month)5 13.2875 0.3748 35.448 < 2e-16 ***
## factor(daily.training.month)6 16.4009 0.3788 43.302 < 2e-16 ***
## factor(daily.training.month)7 19.3199 0.3767 51.293 < 2e-16 ***
## factor(daily.training.month)8 18.4052 0.3778 48.717 < 2e-16 ***
## factor(daily.training.month)9 14.3539 0.3821 37.566 < 2e-16 ***
## factor(daily.training.month)10 9.0388 0.3808 23.738 < 2e-16 ***
## factor(daily.training.month)11 5.6022 0.3865 14.493 < 2e-16 ***
## factor(daily.training.month)12 1.8139 0.3905 4.645 3.55e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(daily.time.pts) 8.7 8.974 15.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.722 Deviance explained = 72.4%
## GCV = 17.403 Scale est. = 17.277 n = 2879
gam3.fitted = gam(daily.humidity.train.ts ~ s(daily.time.pts)+factor(daily.training.month))
summary(gam3.fitted)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## daily.humidity.train.ts ~ s(daily.time.pts) + factor(daily.training.month)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.39232 0.09080 37.361 < 2e-16 ***
## factor(daily.training.month)2 -0.04639 0.13004 -0.357 0.721321
## factor(daily.training.month)3 0.60623 0.12709 4.770 1.93e-06 ***
## factor(daily.training.month)4 1.73257 0.12830 13.504 < 2e-16 ***
## factor(daily.training.month)5 3.19271 0.12748 25.046 < 2e-16 ***
## factor(daily.training.month)6 4.82367 0.12880 37.450 < 2e-16 ***
## factor(daily.training.month)7 6.07194 0.12808 47.406 < 2e-16 ***
## factor(daily.training.month)8 5.76366 0.12846 44.866 < 2e-16 ***
## factor(daily.training.month)9 4.53389 0.12992 34.898 < 2e-16 ***
## factor(daily.training.month)10 2.72323 0.12946 21.035 < 2e-16 ***
## factor(daily.training.month)11 1.53642 0.13143 11.690 < 2e-16 ***
## factor(daily.training.month)12 0.48842 0.13278 3.678 0.000239 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(daily.time.pts) 8.602 8.954 12.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.701 Deviance explained = 70.3%
## GCV = 2.0129 Scale est. = 1.9985 n = 2879
Response: Based on the model outputs above we can conclude the following: for all 3 models the monthly seasonality is significant. Trend is also significant for all 3 models.
gam1.fitted.ts = ts(fitted(gam1.fitted), freq = 365, start = c(2009,1))
plot(daily.pressure.train.ts,ylab="Pressure", col="black")
lines(gam1.fitted.ts, lwd=1.5,col="purple")
legend(x="topleft", legend=c("Pressure", "Non-parametric trend and monthly seasonality using ANOVA"), fill = c("black", "purple"), cex=0.7, lty=1)
gam2.fitted.ts = ts(fitted(gam2.fitted), freq = 365, start = c(2009,1))
plot(daily.temp.train.ts,ylab="Temperature", col="black")
lines(gam2.fitted.ts, lwd=1.5,col="purple")
legend(x="topleft", legend=c("Temperature", "Non-parametric trend and monthly seasonality using ANOVA"), fill = c("black", "purple"), cex=0.7, lty=1)
gam3.fitted.ts = ts(fitted(gam3.fitted), freq = 365, start = c(2009,1))
plot(daily.humidity.train.ts,ylab="Humidity", col="black")
lines(gam3.fitted.ts, lwd=1.5,col="purple")
legend(x="topleft", legend=c("Humidity", "Non-parametric trend and monthly seasonality using ANOVA"), fill = c("black", "purple"), cex=0.7, lty=1)
Response: Looking at the plots above, we can see that for all 3 time series we were able to capture the main pattern, especially it is visible for temperature and humidity time series that have clear seasonality patterns.
par(mfrow=c(2,2))
ts.plot(ts(gam1.fitted$residuals, frequency = 365, start = c(2009,1)), main="Pressure Residuals")
acf(gam1.fitted$residuals, lag.max = 40, main="ACF of Pressure Residuals")
acf(gam1.fitted$residuals^2, lag.max = 40, main="ACF of Squared Pressure Residuals")
pacf(gam1.fitted$residuals, lag.max = 40, main="PACF of Pressure Residuals")
adf.test(gam1.fitted$residuals, k=1)
##
## Augmented Dickey-Fuller Test
##
## data: gam1.fitted$residuals
## Dickey-Fuller = -23.051, Lag order = 1, p-value = 0.01
## alternative hypothesis: stationary
adf.test(gam1.fitted$residuals, k=5)
##
## Augmented Dickey-Fuller Test
##
## data: gam1.fitted$residuals
## Dickey-Fuller = -16.966, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(gam1.fitted$residuals)
##
## KPSS Test for Level Stationarity
##
## data: gam1.fitted$residuals
## KPSS Level = 0.018621, Truncation lag parameter = 9, p-value = 0.1
Response: Looking at the time series plot variance seems to be somewhat constant, howevered acf of squared residuals shows some correlation. On the ACF plot we see that most of the values after lag 7 are within the confidence band, which indicates stationarity. Results of Augmented Dickey-Fuller Test and KPSS test also suggest stationary data. To conclude, based on time series plot, ACF plot and results of the test for stationarity we conclude that the residuals of the first model are stationary and there’s heteroscedasticity present.
par(mfrow=c(2,2))
ts.plot(ts(gam2.fitted$residuals, frequency = 365, start = c(2009,1)), main="Temperature Residuals")
acf(gam2.fitted$residuals, lag.max = 40, main="ACF of Temperature Residuals")
acf(gam2.fitted$residuals^2, lag.max = 40, main="ACF of Squared Temperature Residuals")
pacf(gam2.fitted$residuals, lag.max = 40, main="PACF of Temperature Residuals")
adf.test(gam2.fitted$residuals, k=1)
##
## Augmented Dickey-Fuller Test
##
## data: gam2.fitted$residuals
## Dickey-Fuller = -20.712, Lag order = 1, p-value = 0.01
## alternative hypothesis: stationary
adf.test(gam2.fitted$residuals, k=5)
##
## Augmented Dickey-Fuller Test
##
## data: gam2.fitted$residuals
## Dickey-Fuller = -15.875, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(gam2.fitted$residuals)
##
## KPSS Test for Level Stationarity
##
## data: gam2.fitted$residuals
## KPSS Level = 0.013906, Truncation lag parameter = 9, p-value = 0.1
Response: Looking at the time series plot variance seems to be constant and squared residuals also show the same result. On the ACF plot we see that most of the values after lag 8 are within the confidence band, which indicates stationarity. However, there are might be a seasonal pattern left in the residuals. Results of Augmented Dickey-Fuller Test and KPSS test suggest stationary data. Based on time series plot, ACF plot and results of the test for stationarity we conclude that the residuals of the second model seem to be stationary and the variance seem to be constant.
par(mfrow=c(2,2))
ts.plot(ts(gam3.fitted$residuals, frequency = 365, start = c(2009,1)), main="Humidity Residuals")
acf(gam3.fitted$residuals, lag.max = 40, main="ACF of Humidity Residuals")
acf(gam3.fitted$residuals^2, lag.max = 40, main="ACF of Squared Humidity Residuals")
pacf(gam3.fitted$residuals, lag.max = 40, main="PACF of Humidity Residuals")
adf.test(gam3.fitted$residuals, k=1)
##
## Augmented Dickey-Fuller Test
##
## data: gam3.fitted$residuals
## Dickey-Fuller = -24.121, Lag order = 1, p-value = 0.01
## alternative hypothesis: stationary
adf.test(gam3.fitted$residuals, k=5)
##
## Augmented Dickey-Fuller Test
##
## data: gam3.fitted$residuals
## Dickey-Fuller = -17.022, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(gam3.fitted$residuals)
##
## KPSS Test for Level Stationarity
##
## data: gam3.fitted$residuals
## KPSS Level = 0.010698, Truncation lag parameter = 9, p-value = 0.1
Response: Looking at the time series plot and ACF plot of Squared residuals we conclude that the variance isn’t constant. On the ACF plot we see a seasonal pattern, which indicates non stationarity. However, results of Augmented Dickey-Fuller Test and KPSS both test suggest stationary data. So there is not enough evidence to indicate non stationary data. To conclude, based on time series plot, ACF plot and results of the test for stationarity we conclude that the residuals of the third model are close to stationary, but we cannot say for sure. There’s also heteroscedasticity in the residuals.
prediction.h <- c()
prediction.p <- c()
prediction.t <- c()
for (i in 1:6) {
data <- daily.training.data
new.data <- daily.testing.data[c((1 + (i - 1)*7):(i*7)),]
if (i >= 2) {
data <- rbind(data, daily.testing.data[c(1:((i - 1)*7)),])
}
tps <- c(1:nrow(data.daily))
tps <- c(tps - min(tps))/max(tps)
data$tps <- tps[1:nrow(data)]
new.data$tps <- tps[(nrow(data) + 1):(nrow(data) + 7)]
gam.p <- gam(pressure ~ s(tps)+factor(month), data = data)
prediction.p <- c(prediction.p, predict.gam(gam.p, new.data))
gam.t <- gam(temperature ~ s(tps)+factor(month), data = data)
prediction.t <- c(prediction.t, predict.gam(gam.t, new.data))
gam.h <- gam(humidity ~ s(tps)+factor(month), data = data)
prediction.h <- c(prediction.h, predict.gam(gam.h, new.data))
}
gam.prediction <- data.frame('date' = daily.testing.data$date,
'pressure' = prediction.p,
'temperature' = prediction.t,
'humidity' = prediction.h,
'month' = daily.testing.data$month)
plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(980,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p, pch = 1, col = 'red', ylim = c(980,1020), ylab = '', cex = 0.6, xlab = 'Date')
plot(daily.testing.data$date, daily.testing.data$temperature, ylim = c(268,285), main = 'Temperature Prediction', ylab = 'Temperature', type = 'l', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t, col = 'red', ylim = c(268,285),ylab = '', pch = 1, cex = 0.8, xlab = 'Date')
plot(daily.testing.data$date, daily.testing.data$humidity, ylim = c(2,6), main = 'Humidity Prediction',ylab = 'Humidity', type = 'l', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h, col = 'red', ylim = c(2,6),ylab = '', pch = 1, cex = 0.8, xlab = 'Date')
nfit = length(daily.testing.date)
harmonic.train <- cbind(sin(daily.training.month*2*pi/12),cos(daily.training.month*2*pi/12))
harmonic.test <- cbind(sin(daily.testing.month*2*pi/12),cos(daily.testing.month*2*pi/12))
harmonic.all <- rbind(harmonic.train, harmonic.test)
## Fit an ARMA model to the residuals
#Iterative function
test_modelA <- function(p,d,q,train.data){
mod = Arima(train.data,order=c(p,d,q),xreg=as.matrix(harmonic.train))
current.aic = AIC(mod)
df = data.frame(p,d,q,current.aic)
names(df) <- c("p","d","q","AIC")
return(df)
}
#orders = data.frame(Inf,Inf,Inf, Inf)
#names(orders) <- c("p","d","q","AIC")
#Orders for Arima for Pressure
#for (p in 0:9){
# for (q in 0:9) {
# for (d in 0:1) {
# possibleError <- tryCatch(
# orders<-rbind(orders,test_modelA(p,d,q,daily.pressure.train.ts)),
# error=function(e) e
# )
# if(inherits(possibleError, "error")) next
# }
# }
#}
#orders <- orders[order(-orders$AIC),]
#tail(orders,5)
arma.pressure <- Arima(daily.pressure.train.ts,order=c(4,1,6),xreg=as.matrix(harmonic.train))
arma.pressure
## Series: daily.pressure.train.ts
## Regression with ARIMA(4,1,6) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.2212 -0.3910 0.0810 0.3321 -0.1713 0.0267 -0.2155 -0.5524
## s.e. 0.2714 0.2003 0.2358 0.1684 0.2713 0.1822 0.0969 0.1548
## ma5 ma6 xreg1 xreg2
## -0.1592 0.0746 -0.7215 -0.5604
## s.e. 0.0968 0.0282 0.5014 0.5009
##
## sigma^2 = 21.21: log likelihood = -8475.3
## AIC=16976.6 AICc=16976.72 BIC=17054.14
## compute the test statistics
testval <-
as.numeric(arma.pressure$coef) / as.numeric(sqrt(diag(arma.pressure$var.coef)))
## print t-statistic
print(testval)
## [1] 0.8152592 -1.9522947 0.3435231 1.9718864 -0.6316090 0.1468291
## [7] -2.2234923 -3.5676879 -1.6447554 2.6449889 -1.4390824 -1.1187279
## print p-values
print(2*(1-pnorm(abs(testval))))
## [1] 0.4149239385 0.0509032318 0.7312049414 0.0486225735 0.5276423839
## [6] 0.8832668805 0.0261826206 0.0003601451 0.1000202680 0.0081693601
## [11] 0.1501271656 0.2632562195
## print significant coefficients for significance level 0.05
which(abs(testval)>qnorm((1-0.05/2)))
## [1] 4 7 8 10
par(mfrow=c(2,2))
ts.plot(ts(residuals(arma.pressure), frequency = 365, start = c(2009,1)))
acf(residuals(arma.pressure), lag.max = 40)
acf(residuals(arma.pressure)^2, lag.max = 40)
qqnorm(residuals(arma.pressure))
qqline(residuals(arma.pressure))
#perform shapiro-wilk test
shapiro.test(residuals(arma.pressure))
##
## Shapiro-Wilk normality test
##
## data: residuals(arma.pressure)
## W = 0.99092, p-value = 1.518e-12
#orders = data.frame(Inf,Inf,Inf, Inf)
#names(orders) <- c("p","d","q","AIC")
#Orders for Arima for Temperature
#for (p in 0:9){
# for (q in 0:9) {
# for (d in 0:1) {
# possibleError <- tryCatch(
# orders<-rbind(orders,test_modelA(p,d,q,daily.temp.train.ts)),
# error=function(e) e
# )
# if(inherits(possibleError, "error")) next
# }
# }
#}
#orders <- orders[order(-orders$AIC),]
#tail(orders,5)
arma.temp <- Arima(daily.temp.train.ts,order=c(8,1,4),xreg=as.matrix(harmonic.train))
arma.temp
## Series: daily.temp.train.ts
## Regression with ARIMA(8,1,4) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.1733 1.2400 0.1580 -0.7332 0.2136 -0.0118 -0.0170 -0.0406
## s.e. 0.0240 0.0249 0.0309 0.0322 0.0304 0.0301 0.0203 0.0200
## ma1 ma2 ma3 ma4 xreg1 xreg2
## -0.1918 -1.4737 -0.2645 0.9386 -0.9585 -0.9762
## s.e. 0.0154 0.0159 0.0117 0.0169 0.6957 0.7700
##
## sigma^2 = 6.214: log likelihood = -6706
## AIC=13442 AICc=13442.16 BIC=13531.47
## compute the test statistics
testval <-
as.numeric(arma.temp$coef) / as.numeric(sqrt(diag(arma.temp$var.coef)))
## print t-statistic
print(testval)
## [1] 7.2344270 49.7931149 5.1078425 -22.7548978 7.0342553 -0.3934796
## [7] -0.8384977 -2.0271002 -12.4184156 -92.5717346 -22.6612535 55.5215764
## [13] -1.3776419 -1.2679162
## print p-values
print(2*(1-pnorm(abs(testval))))
## [1] 4.674039e-13 0.000000e+00 3.258580e-07 0.000000e+00 2.003286e-12
## [6] 6.939653e-01 4.017512e-01 4.265216e-02 0.000000e+00 0.000000e+00
## [11] 0.000000e+00 0.000000e+00 1.683139e-01 2.048279e-01
## print significant coefficients for significance level 0.05
which(abs(testval)>qnorm((1-0.05/2)))
## [1] 1 2 3 4 5 8 9 10 11 12
par(mfrow=c(2,2))
ts.plot(ts(residuals(arma.temp), frequency = 365, start = c(2009,1)))
acf(residuals(arma.temp), lag.max = 40)
acf(residuals(arma.temp)^2, lag.max = 40)
qqnorm(residuals(arma.temp))
qqline(residuals(arma.temp))
#perform shapiro-wilk test
shapiro.test(residuals(arma.temp))
##
## Shapiro-Wilk normality test
##
## data: residuals(arma.temp)
## W = 0.99711, p-value = 2.872e-05
#Orders for Arima for Humidity
#orders = data.frame(Inf,Inf,Inf, Inf)
#names(orders) <- c("p","d","q","AIC")
#for (p in 0:9){
# for (q in 0:9) {
# for (d in 0:1) {
# possibleError <- tryCatch(
# orders<-rbind(orders,test_modelA(p,d,q,daily.humidity.train.ts)),
# error=function(e) e
# )
# if(inherits(possibleError, "error")) next
# }
# }
#}
#orders <- orders[order(-orders$AIC),]
#tail(orders,5)
arma.humidity <- Arima(daily.humidity.train.ts,order=c(5,0,1),xreg=as.matrix(harmonic.train))
arma.humidity
## Series: daily.humidity.train.ts
## Regression with ARIMA(5,0,1) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 intercept xreg1
## 1.8894 -1.1621 0.3121 -0.0359 -0.0081 -0.9365 5.9825 -1.2126
## s.e. 0.0233 0.0418 0.0452 0.0402 0.0198 0.0175 0.2317 0.2901
## xreg2
## -1.3177
## s.e. 0.3220
##
## sigma^2 = 0.8555: log likelihood = -3856.68
## AIC=7733.36 AICc=7733.44 BIC=7793.01
## compute the test statistics
testval <-
as.numeric(arma.humidity$coef) / as.numeric(sqrt(diag(arma.humidity$var.coef)))
## print t-statistic
print(testval)
## [1] 81.1055562 -27.8004696 6.9094195 -0.8926125 -0.4083381 -53.3762094
## [7] 25.8197147 -4.1806021 -4.0918830
## print p-values
print(2*(1-pnorm(abs(testval))))
## [1] 0.000000e+00 0.000000e+00 4.866330e-12 3.720647e-01 6.830255e-01
## [6] 0.000000e+00 0.000000e+00 2.907382e-05 4.278845e-05
## print significant coefficients for significance level 0.05
which(abs(testval)>qnorm((1-0.05/2)))
## [1] 1 2 3 6 7 8 9
par(mfrow=c(2,2))
ts.plot(ts(residuals(arma.humidity), frequency = 365, start = c(2009,1)))
acf(residuals(arma.humidity), lag.max = 40)
acf(residuals(arma.humidity)^2, lag.max = 40)
qqnorm(residuals(arma.humidity))
qqline(residuals(arma.humidity))
#perform shapiro-wilk test
shapiro.test(residuals(arma.humidity))
##
## Shapiro-Wilk normality test
##
## data: residuals(arma.humidity)
## W = 0.98891, p-value = 3.419e-14
For all 3 models we see the similar results: 1) ACF plot of residuals indicates that the residuals aren’t correlated 2) ACF plot of Squared residuals indicates that the squared residuals are correlated, so we conclude that heteroskedasticity is present in the residuals. 3) Shapiro-Wilk normality test shows that the residuals are not normally distributed because it rejects the null hypothesis of normal distribution. The QQ plot also shows heavier tails than for normal distribution. We conclude that the residuals does not follow normal distribution, which is required for model fitting using MLE. We conclude that our models aren’t the good fit.
prediction.p <- data.frame(Inf, Inf, Inf)
names(prediction.p) <- c('prediction', 'lower', 'upper')
prediction.t <- data.frame(Inf, Inf, Inf)
names(prediction.t) <- c('prediction', 'lower', 'upper')
prediction.h <- data.frame(Inf, Inf, Inf)
names(prediction.h) <- c('prediction', 'lower', 'upper')
for (i in 1:6) {
data <- daily.training.data
new.xreg <- harmonic.test[(1 + (i - 1)*7): (i*7),]
if (i >= 2) {
data <- rbind(data, daily.testing.data[c(1:((i - 1)*7)),])
}
arma.p <- Arima(data$pressure,order=c(4,1,6),
xreg=as.matrix(harmonic.all[1:nrow(data),]))
arma.pred.p <- predict(arma.p, n.ahead = 7, newxreg = as.matrix(new.xreg))
lb <- arma.pred.p$pred - 1.96 * arma.pred.p$se
ub <- arma.pred.p$pred + 1.96 * arma.pred.p$se
temp <- data.frame('prediction' = arma.pred.p$pred, 'lower' = lb, 'upper' = ub)
prediction.p <- rbind(prediction.p, temp)
arma.t <- Arima(data$temperature,order=c(8,1,4),
xreg=as.matrix(harmonic.all[1:nrow(data),]))
arma.pred.t <- predict(arma.t, n.ahead = 7, newxreg = as.matrix(new.xreg))
lb <- arma.pred.t$pred - 1.96 * arma.pred.t$se
ub <- arma.pred.t$pred + 1.96 * arma.pred.t$se
temp <- data.frame('prediction' = arma.pred.t$pred, 'lower' = lb, 'upper' = ub)
prediction.t <- rbind(prediction.t, temp)
arma.h <- Arima(data$humidity,order=c(5,0,1),
xreg=as.matrix(harmonic.all[1:nrow(data),]))
arma.pred.h <- predict(arma.h, n.ahead = 7, newxreg = as.matrix(new.xreg))
lb <- arma.pred.h$pred - 1.96 * arma.pred.h$se
ub <- arma.pred.h$pred + 1.96 * arma.pred.h$se
temp <- data.frame('prediction' = arma.pred.h$pred, 'lower' = lb, 'upper' = ub)
prediction.h <- rbind(prediction.h, temp)
}
prediction.p <- prediction.p[-1,]
prediction.p$date <- daily.testing.data$date
prediction.t <- prediction.t[-1,]
prediction.t$date <- daily.testing.data$date
prediction.h <- prediction.h[-1,]
prediction.h$date <- daily.testing.data$date
armax.prediction <- list(prediction.p, prediction.t, prediction.h)
save(armax.prediction, file = 'armax-prediction.RData')
load('armax-prediction.RData')
prediction.p <- armax.prediction[[1]]
prediction.t <- armax.prediction[[2]]
prediction.h <- armax.prediction[[3]]
plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(970,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$prediction, type = 'l', col = 'red', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$lower, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$upper, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
plot(daily.testing.data$date, daily.testing.data$temperature, type = 'l', ylim = c(264,289), main = 'Temperature Prediction', ylab = 'Temperature', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$prediction, type = 'l', col = 'red', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$lower, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$upper, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
plot(daily.testing.data$date, daily.testing.data$humidity, type = 'l', ylim = c(0.5,8), main = 'Humidity Prediction', ylab = 'Humidity', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$prediction, type = 'l', col = 'red', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$lower, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$upper, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
# We take order of ARMA equal to (4,1,6)
# Find order for GARCH
test_modelAGG <- function(m,n){
spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
mean.model=list(armaOrder=c(4,1,6),
external.regressors = as.matrix(harmonic.train),
include.mean=T), distribution.model="std")
fit = ugarchfit(spec, daily.pressure.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(m,n,current.aic)
names(df) <- c("m","n","AIC")
return(df)
}
ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")
for (m in 0:2){
for (n in 0:2){
possibleError <- tryCatch(
ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
# Update ARMA using GARCH = (2,2)
test_modelAGA <- function(p,q){
spec = ugarchspec(variance.model=list(garchOrder=c(1,1)),
mean.model=list(armaOrder=c(p,q),
external.regressors = as.matrix(harmonic.train),
include.mean=T),
distribution.model="std")
fit = ugarchfit(spec, daily.pressure.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(p,q,current.aic)
names(df) <- c("p","q","AIC")
return(df)
}
ordersAGA = data.frame(Inf,Inf,Inf)
names(ordersAGA) <- c("p","q","AIC")
for (p in 0:7){
for (q in 0:7){1
ordersAGA<-rbind(ordersAGA,test_modelAGA(p,q))
}
}
ordersAGA <- ordersAGA[order(-ordersAGA$AIC),]
tail(ordersAGA)
# Update order for GARCH using ARMA (6,5s)
test_modelAGG <- function(m,n){
spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
mean.model=list(armaOrder=c(6,5),
external.regressors = as.matrix(harmonic.train),
include.mean=T), distribution.model="std")
fit = ugarchfit(spec, daily.pressure.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(m,n,current.aic)
names(df) <- c("m","n","AIC")
return(df)
}
ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")
for (m in 0:2){
for (n in 0:2){
possibleError <- tryCatch(
ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
The final iteration shows that the optimal orders for modeling the temperature are (6,5)x(1,1).
spec.1 = ugarchspec(variance.model=list(garchOrder=c(1,1)),
mean.model=list(armaOrder=c(6,5),
external.regressors = as.matrix(harmonic.train),
include.mean=T), distribution.model="std")
pressure.arma.garch<-ugarchfit(spec.1,daily.pressure.train.ts,solver='hybrid')
pressure.arma.garch
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(6,0,5)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 989.401851 0.091700 10789.49782 0.000000
## ar1 1.211094 0.000159 7604.32367 0.000000
## ar2 -1.446503 0.000164 -8826.57850 0.000000
## ar3 0.825087 0.000105 7827.93749 0.000000
## ar4 -0.215320 0.000049 -4413.96939 0.000000
## ar5 0.157929 0.000043 3702.87930 0.000000
## ar6 -0.062584 0.000022 -2908.71669 0.000000
## ma1 -0.162302 0.000077 -2096.15922 0.000000
## ma2 0.840706 0.000521 1614.89465 0.000000
## ma3 0.302594 0.000150 2013.04310 0.000000
## ma4 0.089584 0.000144 621.09398 0.000000
## ma5 -0.061357 0.000032 -1891.60261 0.000000
## mxreg1 -0.734180 0.022781 -32.22758 0.000000
## mxreg2 0.098535 0.014506 6.79279 0.000000
## omega 0.403571 1.367313 0.29516 0.767875
## alpha1 0.061882 0.138446 0.44698 0.654893
## beta1 0.920318 0.194140 4.74049 0.000002
## shape 11.774023 2.715305 4.33617 0.000014
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 989.401851 5.348159 184.998592 0.00000
## ar1 1.211094 0.005484 220.832236 0.00000
## ar2 -1.446503 0.006723 -215.171612 0.00000
## ar3 0.825087 0.004088 201.849016 0.00000
## ar4 -0.215320 0.001774 -121.398837 0.00000
## ar5 0.157929 0.001588 99.435319 0.00000
## ar6 -0.062584 0.000719 -86.984137 0.00000
## ma1 -0.162302 0.002120 -76.564650 0.00000
## ma2 0.840706 0.025542 32.914218 0.00000
## ma3 0.302594 0.006169 49.051932 0.00000
## ma4 0.089584 0.006321 14.172753 0.00000
## ma5 -0.061357 0.000838 -73.224680 0.00000
## mxreg1 -0.734180 1.171891 -0.626492 0.53099
## mxreg2 0.098535 0.812618 0.121256 0.90349
## omega 0.403571 66.086606 0.006107 0.99513
## alpha1 0.061882 6.754111 0.009162 0.99269
## beta1 0.920318 9.448194 0.097407 0.92240
## shape 11.774023 75.153613 0.156666 0.87551
##
## LogLikelihood : -8340.529
##
## Information Criteria
## ------------------------------------
##
## Akaike 5.8066
## Bayes 5.8438
## Shibata 5.8065
## Hannan-Quinn 5.8200
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.05628 0.81248
## Lag[2*(p+q)+(p+q)-1][32] 17.36195 0.07254
## Lag[4*(p+q)+(p+q)-1][54] 30.31513 0.22666
## d.o.f=11
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 2.973 0.08467
## Lag[2*(p+q)+(p+q)-1][5] 4.139 0.23722
## Lag[4*(p+q)+(p+q)-1][9] 4.615 0.48804
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.008066 0.500 2.000 0.9284
## ARCH Lag[5] 0.087844 1.440 1.667 0.9890
## ARCH Lag[7] 0.248433 2.315 1.543 0.9952
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 10.8088
## Individual Statistics:
## mu 0.03189
## ar1 0.06254
## ar2 0.03383
## ar3 0.03848
## ar4 0.06025
## ar5 0.01822
## ar6 0.06626
## ma1 0.02106
## ma2 0.04208
## ma3 0.03860
## ma4 0.03483
## ma5 0.04281
## mxreg1 0.03263
## mxreg2 0.03129
## omega 0.04452
## alpha1 0.02901
## beta1 0.04661
## shape 0.11154
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 3.83 4.14 4.73
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 1.1227 0.261668
## Negative Sign Bias 1.6181 0.105754
## Positive Sign Bias 0.1311 0.895674
## Joint Effect 13.0328 0.004566 ***
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 26.34 0.12112
## 2 30 33.69 0.25085
## 3 40 53.14 0.06499
## 4 50 58.88 0.15768
##
##
## Elapsed time : 1.286806
par(mfrow=c(2,2))
ts.plot(ts(residuals(pressure.arma.garch), frequency = 365, start = c(2009,1)))
acf(residuals(pressure.arma.garch), lag.max = 40)
acf(residuals(pressure.arma.garch)^2, lag.max = 40)
qqnorm(residuals(pressure.arma.garch))
qqline(residuals(pressure.arma.garch))
# We take order of ARMA equal to (8,1,4)
# Find order for GARCH
test_modelAGG <- function(m,n){
spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
mean.model=list(armaOrder=c(8,1,4),
external.regressors = as.matrix(harmonic.train),
include.mean=T), distribution.model="std")
fit = ugarchfit(spec, daily.temp.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(m,n,current.aic)
names(df) <- c("m","n","AIC")
return(df)
}
ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")
for (m in 0:2){
for (n in 0:2){
possibleError <- tryCatch(
ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
# Update ARMA using GARCH = (2,0)
test_modelAGA <- function(p,q){
spec = ugarchspec(variance.model=list(garchOrder=c(2,0)),
mean.model=list(armaOrder=c(p,q),
external.regressors = as.matrix(harmonic.train),
include.mean=T),
distribution.model="std")
fit = ugarchfit(spec, daily.temp.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(p,q,current.aic)
names(df) <- c("p","q","AIC")
return(df)
}
ordersAGA = data.frame(Inf,Inf,Inf)
names(ordersAGA) <- c("p","q","AIC")
for (p in 0:7){
for (q in 0:7){
possibleError <- tryCatch(
ordersAGA<-rbind(ordersAGA,test_modelAGA(p,q)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGA <- ordersAGA[order(-ordersAGA$AIC),]
tail(ordersAGA)
# We take order of ARMA equal to (6,5)
# Find order for GARCH
test_modelAGG <- function(m,n){
spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
mean.model=list(armaOrder=c(6,5),
external.regressors = as.matrix(harmonic.train),
include.mean=T), distribution.model="std")
fit = ugarchfit(spec, daily.temp.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(m,n,current.aic)
names(df) <- c("m","n","AIC")
return(df)
}
ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")
for (m in 0:2){
for (n in 0:2){
possibleError <- tryCatch(
ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
# Update ARMA using GARCH = (1,1)
test_modelAGA <- function(p,q){
spec = ugarchspec(variance.model=list(garchOrder=c(1,1)),
mean.model=list(armaOrder=c(p,q),
external.regressors = as.matrix(harmonic.train),
include.mean=T),
distribution.model="std")
fit = ugarchfit(spec, daily.temp.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(p,q,current.aic)
names(df) <- c("p","q","AIC")
return(df)
}
ordersAGA = data.frame(Inf,Inf,Inf)
names(ordersAGA) <- c("p","q","AIC")
for (p in 0:7){
for (q in 0:7){
possibleError <- tryCatch(
ordersAGA<-rbind(ordersAGA,test_modelAGA(p,q)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGA <- ordersAGA[order(-ordersAGA$AIC),]
tail(ordersAGA)
The final iteration shows that the optimal orders for modeling the temperature are (6,5)x(1,1).
spec.2 = ugarchspec(variance.model=list(garchOrder=c(1,1)),
mean.model=list(armaOrder=c(6,5),
external.regressors = as.matrix(harmonic.train),
include.mean=T), distribution.model="std")
temp.arma.garch<-ugarchfit(spec.2,daily.temp.train.ts,solver='hybrid')
temp.arma.garch
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(6,0,5)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 271.263886 1.427893 189.97493 0.000000
## ar1 0.569273 0.033366 17.06132 0.000000
## ar2 -0.021357 0.072262 -0.29555 0.767570
## ar3 0.700076 0.065037 10.76426 0.000000
## ar4 0.097627 0.052148 1.87211 0.061192
## ar5 -0.468455 0.072153 -6.49249 0.000000
## ar6 0.120216 0.013258 9.06764 0.000000
## ma1 0.434493 0.038666 11.23705 0.000000
## ma2 0.230926 0.033817 6.82871 0.000000
## ma3 -0.522068 0.037897 -13.77608 0.000000
## ma4 -0.659268 0.017992 -36.64278 0.000000
## ma5 -0.031916 0.018895 -1.68912 0.091196
## mxreg1 -1.278284 0.677050 -1.88802 0.059023
## mxreg2 -1.251881 0.794864 -1.57496 0.115265
## omega 0.951081 0.250829 3.79175 0.000150
## alpha1 0.107496 0.020741 5.18288 0.000000
## beta1 0.739703 0.053418 13.84742 0.000000
## shape 21.331965 7.005240 3.04514 0.002326
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 271.263886 2.114332 128.29767 0.000000
## ar1 0.569273 0.042284 13.46296 0.000000
## ar2 -0.021357 0.068728 -0.31075 0.755987
## ar3 0.700076 0.089535 7.81905 0.000000
## ar4 0.097627 0.066408 1.47010 0.141535
## ar5 -0.468455 0.074539 -6.28471 0.000000
## ar6 0.120216 0.057499 2.09075 0.036551
## ma1 0.434493 0.046062 9.43280 0.000000
## ma2 0.230926 0.049612 4.65461 0.000003
## ma3 -0.522068 0.072440 -7.20687 0.000000
## ma4 -0.659268 0.060327 -10.92817 0.000000
## ma5 -0.031916 0.044842 -0.71174 0.476627
## mxreg1 -1.278284 0.830991 -1.53826 0.123984
## mxreg2 -1.251881 1.228604 -1.01895 0.308229
## omega 0.951081 0.269001 3.53560 0.000407
## alpha1 0.107496 0.021602 4.97617 0.000001
## beta1 0.739703 0.057360 12.89573 0.000000
## shape 21.331965 8.097314 2.63445 0.008427
##
## LogLikelihood : -6663.193
##
## Information Criteria
## ------------------------------------
##
## Akaike 4.6413
## Bayes 4.6786
## Shibata 4.6413
## Hannan-Quinn 4.6548
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.07248 0.7878
## Lag[2*(p+q)+(p+q)-1][32] 14.17249 1.0000
## Lag[4*(p+q)+(p+q)-1][54] 30.74302 0.1969
## d.o.f=11
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.641 0.4233
## Lag[2*(p+q)+(p+q)-1][5] 4.702 0.1786
## Lag[4*(p+q)+(p+q)-1][9] 6.769 0.2193
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 2.001 0.500 2.000 0.1572
## ARCH Lag[5] 2.900 1.440 1.667 0.3047
## ARCH Lag[7] 4.291 2.315 1.543 0.3066
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 3.1633
## Individual Statistics:
## mu 0.004208
## ar1 0.118600
## ar2 0.048165
## ar3 0.027247
## ar4 0.098318
## ar5 0.101603
## ar6 0.026675
## ma1 0.521380
## ma2 0.399189
## ma3 1.065975
## ma4 0.057628
## ma5 1.385689
## mxreg1 0.033016
## mxreg2 0.078751
## omega 0.234777
## alpha1 0.189449
## beta1 0.218194
## shape 0.063735
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 3.83 4.14 4.73
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.97856 0.3279
## Negative Sign Bias 0.09711 0.9226
## Positive Sign Bias 0.11298 0.9101
## Joint Effect 2.43540 0.4871
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 24.79 0.1675
## 2 30 33.29 0.2661
## 3 40 34.44 0.6777
## 4 50 46.72 0.5660
##
##
## Elapsed time : 1.199471
par(mfrow=c(2,2))
ts.plot(ts(residuals(temp.arma.garch), frequency = 365, start = c(2009,1)))
acf(residuals(temp.arma.garch), lag.max = 40)
acf(residuals(temp.arma.garch)^2, lag.max = 40)
qqnorm(residuals(temp.arma.garch))
qqline(residuals(temp.arma.garch))
# We take order of ARMA equal to (6,0,5)
# Find order for GARCH
test_modelAGG <- function(m,n){
spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
mean.model=list(armaOrder=c(6,5),
external.regressors = as.matrix(harmonic.train),
include.mean=T), distribution.model="std")
fit = ugarchfit(spec, daily.humidity.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(m,n,current.aic)
names(df) <- c("m","n","AIC")
return(df)
}
ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")
for (m in 0:2){
for (n in 0:2){
possibleError <- tryCatch(
ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
# Update ARMA using GARCH = (2,2)
test_modelAGA <- function(p,q){
spec = ugarchspec(variance.model=list(garchOrder=c(2,2)),
mean.model=list(armaOrder=c(p,q),
external.regressors = as.matrix(harmonic.train),
include.mean=T),
distribution.model="std")
fit = ugarchfit(spec, daily.humidity.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(p,q,current.aic)
names(df) <- c("p","q","AIC")
return(df)
}
ordersAGA = data.frame(Inf,Inf,Inf)
names(ordersAGA) <- c("p","q","AIC")
for (p in 0:7){
for (q in 0:7){
possibleError <- tryCatch(
ordersAGA<-rbind(ordersAGA,test_modelAGA(p,q)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGA <- ordersAGA[order(-ordersAGA$AIC),]
tail(ordersAGA)
# Update order for GARCH using ARMA (4,4)
test_modelAGG <- function(m,n){
spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
mean.model=list(armaOrder=c(4,4),
external.regressors = as.matrix(harmonic.train),
include.mean=T), distribution.model="std")
fit = ugarchfit(spec, daily.humidity.train.ts, solver = 'hybrid')
current.aic = infocriteria(fit)[1]
df = data.frame(m,n,current.aic)
names(df) <- c("m","n","AIC")
return(df)
}
ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")
for (m in 0:2){
for (n in 0:2){
possibleError <- tryCatch(
ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
error=function(e) e
)
if(inherits(possibleError, "error")) next
}
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
The final iteration shows that the optimal orders for modeling the Humidity are (4,4)x(2,2).
spec.3 = ugarchspec(variance.model=list(garchOrder=c(2,2)),
mean.model=list(armaOrder=c(4,4),
external.regressors = as.matrix(harmonic.train),
include.mean=T),
distribution.model="std")
hum.arma.garch<-ugarchfit(spec.3,daily.humidity.train.ts,solver='hybrid')
hum.arma.garch
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(2,2)
## Mean Model : ARFIMA(4,0,4)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 3.500533 0.329996 10.60780 0.000000
## ar1 0.308159 0.018164 16.96554 0.000000
## ar2 0.537184 0.013391 40.11657 0.000000
## ar3 0.747056 0.011639 64.18771 0.000000
## ar4 -0.598562 0.018233 -32.82787 0.000000
## ma1 0.661218 0.009522 69.43926 0.000000
## ma2 -0.130586 0.003378 -38.66121 0.000000
## ma3 -0.973952 0.000099 -9821.81974 0.000000
## ma4 -0.251958 0.010655 -23.64791 0.000000
## mxreg1 -0.742382 0.200171 -3.70874 0.000208
## mxreg2 -0.752662 0.187394 -4.01647 0.000059
## omega 0.020951 0.007805 2.68427 0.007269
## alpha1 0.092844 0.016580 5.59990 0.000000
## alpha2 0.026643 0.017224 1.54688 0.121892
## beta1 0.027768 0.051735 0.53673 0.591456
## beta2 0.832322 0.047072 17.68187 0.000000
## shape 8.794568 1.477189 5.95359 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 3.500533 0.355593 9.8442e+00 0.000000
## ar1 0.308159 0.018952 1.6260e+01 0.000000
## ar2 0.537184 0.012301 4.3671e+01 0.000000
## ar3 0.747056 0.010426 7.1656e+01 0.000000
## ar4 -0.598562 0.020013 -2.9909e+01 0.000000
## ma1 0.661218 0.009253 7.1461e+01 0.000000
## ma2 -0.130586 0.003134 -4.1661e+01 0.000000
## ma3 -0.973952 0.000085 -1.1491e+04 0.000000
## ma4 -0.251958 0.011229 -2.2438e+01 0.000000
## mxreg1 -0.742382 0.204004 -3.6391e+00 0.000274
## mxreg2 -0.752662 0.208267 -3.6139e+00 0.000302
## omega 0.020951 0.008376 2.5013e+00 0.012374
## alpha1 0.092844 0.016135 5.7544e+00 0.000000
## alpha2 0.026643 0.016852 1.5810e+00 0.113886
## beta1 0.027768 0.039358 7.0552e-01 0.480486
## beta2 0.832322 0.036042 2.3093e+01 0.000000
## shape 8.794568 1.239269 7.0966e+00 0.000000
##
## LogLikelihood : -3693.831
##
## Information Criteria
## ------------------------------------
##
## Akaike 2.5779
## Bayes 2.6131
## Shibata 2.5778
## Hannan-Quinn 2.5906
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 9.406e-04 0.9755
## Lag[2*(p+q)+(p+q)-1][23] 9.075e+00 1.0000
## Lag[4*(p+q)+(p+q)-1][39] 1.820e+01 0.6924
## d.o.f=8
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 1.042 0.3074
## Lag[2*(p+q)+(p+q)-1][11] 5.425 0.5171
## Lag[4*(p+q)+(p+q)-1][19] 9.345 0.5226
## d.o.f=4
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[5] 0.8100 0.500 2.000 0.3681
## ARCH Lag[7] 0.9807 1.473 1.746 0.7628
## ARCH Lag[9] 1.3682 2.402 1.619 0.8748
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 1.4089
## Individual Statistics:
## mu 0.06731
## ar1 0.04072
## ar2 0.03630
## ar3 0.02752
## ar4 0.05690
## ma1 0.03421
## ma2 0.02551
## ma3 0.03256
## ma4 0.02783
## mxreg1 0.07684
## mxreg2 0.05602
## omega 0.05091
## alpha1 0.03350
## alpha2 0.05311
## beta1 0.03857
## beta2 0.03708
## shape 0.14983
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 3.64 3.95 4.51
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 2.143 3.223e-02 **
## Negative Sign Bias 0.906 3.650e-01
## Positive Sign Bias 1.221 2.221e-01
## Joint Effect 27.469 4.694e-06 ***
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 37.84 0.006221
## 2 30 57.51 0.001248
## 3 40 63.34 0.008155
## 4 50 74.68 0.010495
##
##
## Elapsed time : 0.8980579
par(mfrow=c(2,2))
ts.plot(ts(residuals(hum.arma.garch), frequency = 365, start = c(2009,1)))
acf(residuals(hum.arma.garch), lag.max = 40)
acf(residuals(hum.arma.garch)^2, lag.max = 40)
qqnorm(residuals(hum.arma.garch))
qqline(residuals(hum.arma.garch))
prediction.p <- data.frame(Inf, Inf, Inf)
names(prediction.p) <- c('prediction', 'lower', 'upper')
prediction.t <- data.frame(Inf, Inf, Inf)
names(prediction.t) <- c('prediction', 'lower', 'upper')
prediction.h <- data.frame(Inf, Inf, Inf)
names(prediction.h) <- c('prediction', 'lower', 'upper')
for (i in 1:6) {
data <- daily.training.data
if (i >= 2) {
data <- rbind(data, daily.testing.data[c(1:((i - 1)*7)),])
}
spec.p = ugarchspec(variance.model=list(garchOrder=c(1,1)),
mean.model=list(armaOrder=c(6,5),
external.regressors = as.matrix(harmonic.all[c(1:nrow(data)),]),
include.mean=T),
distribution.model="std")
arma.garch.p <-ugarchfit(spec.p, data$pressure,solver='hybrid')
fore <- ugarchforecast(arma.garch.p, n.ahead = 7)
fore <- fore@forecast
temp <- data.frame(fore$seriesFor,
fore$seriesFor - 1.96 * fore$sigmaFor,
fore$seriesFor + 1.96 * fore$sigmaFor)
names(temp) <- c('prediction', 'lower', 'upper')
prediction.p <- rbind(prediction.p, temp)
spec.t = ugarchspec(variance.model=list(garchOrder=c(1,1)),
mean.model=list(armaOrder=c(6,5),
external.regressors = as.matrix(harmonic.all[c(1:nrow(data)),]),
include.mean=T),
distribution.model="std")
arma.garch.t <-ugarchfit(spec.t, data$temperature,solver='hybrid')
fore <- ugarchforecast(arma.garch.t, n.ahead = 7)
fore <- fore@forecast
temp <- data.frame('prediction' = fore$seriesFor,
'lower' = fore$seriesFor - 1.96 * fore$sigmaFor,
'upper' = fore$seriesFor + 1.96 * fore$sigmaFor)
names(temp) <- c('prediction', 'lower', 'upper')
prediction.t <- rbind(prediction.t, temp)
spec.h = ugarchspec(variance.model=list(garchOrder=c(2,2)),
mean.model=list(armaOrder=c(4,4),
external.regressors = as.matrix(harmonic.all[c(1:nrow(data)),]),
include.mean=T),
distribution.model="std")
arma.garch.h <-ugarchfit(spec.h, data$humidity,solver='hybrid')
fore <- ugarchforecast(arma.garch.h, n.ahead = 7)
fore <- fore@forecast
temp <- data.frame('prediction' = fore$seriesFor,
'lower' = fore$seriesFor - 1.96 * fore$sigmaFor,
'upper' = fore$seriesFor + 1.96 * fore$sigmaFor)
names(temp) <- c('prediction', 'lower', 'upper')
prediction.h <- rbind(prediction.h, temp)
}
prediction.p <- prediction.p[-1,]
prediction.p$date <- daily.testing.data$date
prediction.t <- prediction.t[-1,]
prediction.t$date <- daily.testing.data$date
prediction.h <- prediction.h[-1,]
prediction.h$date <- daily.testing.data$date
arma.garch.prediction <- list(prediction.p, prediction.t, prediction.h)
save(arma.garch.prediction, file = 'arma-garch-prediction.RData')
load('arma-garch-prediction.RData')
prediction.p <- arma.garch.prediction[[1]]
prediction.t <- arma.garch.prediction[[2]]
prediction.h <- arma.garch.prediction[[3]]
plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(970,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$prediction, type = 'l', col = 'red', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$lower, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$upper, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
plot(daily.testing.data$date, daily.testing.data$temperature, type = 'l', ylim = c(264,289), main = 'Temperature Prediction', ylab = 'Temperature', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$prediction, type = 'l', col = 'red', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$lower, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$upper, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
plot(daily.testing.data$date, daily.testing.data$humidity, type = 'l', ylim = c(0.5,8), main = 'Humidity Prediction', ylab = 'Humidity', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$prediction, type = 'l', col = 'red', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$lower, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$upper, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
# VAR modeling
train.data <- cbind(daily.pressure.train.ts, daily.temp.train.ts, daily.humidity.train.ts)
test.data <- cbind(daily.pressure.test.ts, daily.temp.test.ts, daily.humidity.test.ts)
colnames(train.data) <- c("pressure", "temperature", "humidity")
var.select<-VARselect(train.data, exogen=harmonic.train, type="both")
var.select$selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 4 3 3 4
var1<-VAR(train.data,p=3, exogen=harmonic.train, type="both")
summary(var1)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: pressure, temperature, humidity
## Deterministic variables: both
## Sample size: 2876
## Log Likelihood: -17670.276
## Roots of the characteristic polynomial:
## 0.8093 0.7514 0.667 0.4092 0.4092 0.3992 0.3992 0.2819 0.2819
## Call:
## VAR(y = train.data, p = 3, type = "both", exogen = harmonic.train)
##
##
## Estimation results for equation pressure:
## =========================================
## pressure = pressure.l1 + temperature.l1 + humidity.l1 + pressure.l2 + temperature.l2 + humidity.l2 + pressure.l3 + temperature.l3 + humidity.l3 + const + trend + exo1 + exo2
##
## Estimate Std. Error t value Pr(>|t|)
## pressure.l1 1.054e+00 2.058e-02 51.218 < 2e-16 ***
## temperature.l1 -1.804e-01 4.883e-02 -3.694 0.000225 ***
## humidity.l1 8.324e-01 1.272e-01 6.542 7.16e-11 ***
## pressure.l2 -4.156e-01 2.948e-02 -14.097 < 2e-16 ***
## temperature.l2 5.542e-02 6.278e-02 0.883 0.377425
## humidity.l2 -5.627e-01 1.599e-01 -3.518 0.000441 ***
## pressure.l3 1.391e-01 2.059e-02 6.755 1.72e-11 ***
## temperature.l3 5.033e-02 4.924e-02 1.022 0.306826
## humidity.l3 5.591e-02 1.272e-01 0.439 0.660402
## const 2.391e+02 1.746e+01 13.692 < 2e-16 ***
## trend 1.409e-04 1.058e-04 1.331 0.183196
## exo1 1.458e-01 1.879e-01 0.776 0.437845
## exo2 1.302e-02 2.184e-01 0.060 0.952466
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 4.574 on 2863 degrees of freedom
## Multiple R-Squared: 0.6748, Adjusted R-squared: 0.6734
## F-statistic: 495.1 on 12 and 2863 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation temperature:
## ============================================
## temperature = pressure.l1 + temperature.l1 + humidity.l1 + pressure.l2 + temperature.l2 + humidity.l2 + pressure.l3 + temperature.l3 + humidity.l3 + const + trend + exo1 + exo2
##
## Estimate Std. Error t value Pr(>|t|)
## pressure.l1 0.0417598 0.0108674 3.843 0.000124 ***
## temperature.l1 0.9599952 0.0257901 37.223 < 2e-16 ***
## humidity.l1 0.0066496 0.0671988 0.099 0.921182
## pressure.l2 0.0358979 0.0155684 2.306 0.021191 *
## temperature.l2 -0.1480294 0.0331556 -4.465 8.33e-06 ***
## humidity.l2 -0.1183646 0.0844662 -1.401 0.161225
## pressure.l3 -0.0416479 0.0108743 -3.830 0.000131 ***
## temperature.l3 0.0555683 0.0260067 2.137 0.032708 *
## humidity.l3 -0.0951040 0.0672013 -1.415 0.157115
## const 2.9812421 9.2208031 0.323 0.746479
## trend 0.0001385 0.0000559 2.478 0.013269 *
## exo1 -0.9534103 0.0992193 -9.609 < 2e-16 ***
## exo2 -1.5338899 0.1153211 -13.301 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 2.416 on 2863 degrees of freedom
## Multiple R-Squared: 0.9062, Adjusted R-squared: 0.9058
## F-statistic: 2306 on 12 and 2863 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation humidity:
## =========================================
## humidity = pressure.l1 + temperature.l1 + humidity.l1 + pressure.l2 + temperature.l2 + humidity.l2 + pressure.l3 + temperature.l3 + humidity.l3 + const + trend + exo1 + exo2
##
## Estimate Std. Error t value Pr(>|t|)
## pressure.l1 1.826e-02 3.992e-03 4.575 4.96e-06 ***
## temperature.l1 9.198e-02 9.474e-03 9.709 < 2e-16 ***
## humidity.l1 7.787e-01 2.468e-02 31.544 < 2e-16 ***
## pressure.l2 2.978e-04 5.719e-03 0.052 0.958481
## temperature.l2 -3.556e-03 1.218e-02 -0.292 0.770309
## humidity.l2 -2.635e-01 3.103e-02 -8.492 < 2e-16 ***
## pressure.l3 -8.311e-03 3.995e-03 -2.081 0.037565 *
## temperature.l3 -3.575e-02 9.553e-03 -3.742 0.000186 ***
## humidity.l3 1.205e-01 2.469e-02 4.883 1.10e-06 ***
## const -2.293e+01 3.387e+00 -6.771 1.55e-11 ***
## trend 3.695e-05 2.053e-05 1.799 0.072077 .
## exo1 -4.314e-01 3.645e-02 -11.835 < 2e-16 ***
## exo2 -4.130e-01 4.236e-02 -9.750 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.8874 on 2863 degrees of freedom
## Multiple R-Squared: 0.8826, Adjusted R-squared: 0.8821
## F-statistic: 1794 on 12 and 2863 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## pressure temperature humidity
## pressure 20.923 -4.692 -1.3364
## temperature -4.692 5.835 1.4163
## humidity -1.336 1.416 0.7874
##
## Correlation matrix of residuals:
## pressure temperature humidity
## pressure 1.0000 -0.4246 -0.3293
## temperature -0.4246 1.0000 0.6607
## humidity -0.3293 0.6607 1.0000
prediction.p <- data.frame(Inf, Inf, Inf)
names(prediction.p) <- c('fcst', 'lower', 'upper')
prediction.t <- data.frame(Inf, Inf, Inf)
names(prediction.t) <- c('fcst', 'lower', 'upper')
prediction.h <- data.frame(Inf, Inf, Inf)
names(prediction.h) <- c('fcst', 'lower', 'upper')
for (i in 1:6) {
data <- train.data
if (i >= 2) {
data <- rbind(data, test.data[c(1:((i - 1)*7)),])
}
exo <- harmonic.all[1:nrow(data),]
new.exo <- harmonic.test[(1 + (i - 1) * 7):(i * 7),]
varx<-VAR(data,p=3, exogen=exo, type="both")
pred <- predict(varx, n.ahead = 7, ci = 0.95, dumvar = new.exo)
pred <- pred$fcst
prediction.p <- rbind(prediction.p, pred$pressure[,1:3])
prediction.t <- rbind(prediction.t, pred$temperature[,1:3])
prediction.h <- rbind(prediction.h, pred$humidity[,1:3])
}
prediction.p <- prediction.p[-1,]
prediction.p$date <- daily.testing.data$date
prediction.t <- prediction.t[-1,]
prediction.t$date <- daily.testing.data$date
prediction.h <- prediction.h[-1,]
prediction.h$date <- daily.testing.data$date
varx.prediction <- list(prediction.p, prediction.t, prediction.h)
save(varx.prediction, file = 'varx-prediction.RData')
load('varx-prediction.RData')
prediction.p <- varx.prediction[[1]]
prediction.t <- varx.prediction[[2]]
prediction.h <- varx.prediction[[3]]
plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(970,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$fcst, type = 'l', col = 'red', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$lower, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$upper, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
plot(daily.testing.data$date, daily.testing.data$temperature, type = 'l', ylim = c(264,289), main = 'Temperature Prediction', ylab = 'Temperature', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$fcst, type = 'l', col = 'red', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$lower, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$upper, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
plot(daily.testing.data$date, daily.testing.data$humidity, type = 'l', ylim = c(0.5,8), main = 'Humidity Prediction', ylab = 'Humidity', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$fcst, type = 'l', col = 'red', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$lower, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$upper, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
Results Looking at the roots of the characteristic polynomial, we see that their absolute values are less than 1, which indicates that our VAR model is stable.
Looking at the output of the models for all 3 weather components we conclude the following: -trend isn’t statistically significant for all 3 weather components. -harmonic seasonality coefficients aren’t statistically significant for the first model (for pressure). However, they are statistically significant for models for temperature and humidity. -for pressure: lead lag relationship with temperature at lag 1, lag 2 and lag 3, lead lag relationship with humidity only at lag 3. Lead lag relationship with the time series itself is statistically significant at lag 1,2 and 3. -for temperature: lead lag relationship with pressure at lag 1, lag 2 and lag 3, lead lag relationship with humidity only at lag 3. Lead lag relationship with the time series itself is statistically significant at lag 1,2 and 3. -for humidity: lead lag relationship with temperature at lag 1 and lag 3, lead lag relationship with pressure at lag 1 and lag 3. Lead lag relationship with the time series itself is statistically significant at lag 1,2 and 3.
var.restrict.model=restrict(var1)
summary(var.restrict.model)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: pressure, temperature, humidity
## Deterministic variables: both
## Sample size: 2876
## Log Likelihood: -17679.725
## Roots of the characteristic polynomial:
## 0.793 0.7476 0.6734 0.4205 0.4205 0.3401 0.3401 0.2352 0.2352
## Call:
## VAR(y = train.data, p = 3, type = "both", exogen = harmonic.train)
##
##
## Estimation results for equation pressure:
## =========================================
## pressure = pressure.l1 + temperature.l1 + humidity.l1 + pressure.l2 + humidity.l2 + pressure.l3 + temperature.l3 + const
##
## Estimate Std. Error t value Pr(>|t|)
## pressure.l1 1.05763 0.01981 53.390 < 2e-16 ***
## temperature.l1 -0.14968 0.03557 -4.208 2.66e-05 ***
## humidity.l1 0.78140 0.12026 6.498 9.58e-11 ***
## pressure.l2 -0.42371 0.02688 -15.763 < 2e-16 ***
## humidity.l2 -0.49051 0.11311 -4.337 1.50e-05 ***
## pressure.l3 0.14443 0.01940 7.445 1.27e-13 ***
## temperature.l3 0.08022 0.03088 2.598 0.00943 **
## const 237.15317 15.34800 15.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 4.573 on 2868 degrees of freedom
## Multiple R-Squared: 1, Adjusted R-squared: 1
## F-statistic: 1.682e+07 on 8 and 2868 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation temperature:
## ============================================
## temperature = pressure.l1 + temperature.l1 + pressure.l2 + temperature.l2 + humidity.l2 + pressure.l3 + trend + exo1 + exo2
##
## Estimate Std. Error t value Pr(>|t|)
## pressure.l1 4.585e-02 1.013e-02 4.526 6.25e-06 ***
## temperature.l1 9.613e-01 2.013e-02 47.763 < 2e-16 ***
## pressure.l2 3.922e-02 1.475e-02 2.659 0.00788 **
## temperature.l2 -9.689e-02 2.240e-02 -4.326 1.57e-05 ***
## humidity.l2 -1.919e-01 4.392e-02 -4.370 1.29e-05 ***
## pressure.l3 -4.524e-02 9.615e-03 -4.705 2.66e-06 ***
## trend 1.354e-04 5.517e-05 2.454 0.01420 *
## exo1 -9.405e-01 9.209e-02 -10.213 < 2e-16 ***
## exo2 -1.523e+00 1.025e-01 -14.868 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 2.416 on 2867 degrees of freedom
## Multiple R-Squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 4.406e+06 on 9 and 2867 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation humidity:
## =========================================
## humidity = pressure.l1 + temperature.l1 + humidity.l1 + humidity.l2 + pressure.l3 + temperature.l3 + humidity.l3 + const + exo1 + exo2
##
## Estimate Std. Error t value Pr(>|t|)
## pressure.l1 0.018813 0.002659 7.076 1.86e-12 ***
## temperature.l1 0.091166 0.007423 12.281 < 2e-16 ***
## humidity.l1 0.781483 0.023442 33.337 < 2e-16 ***
## humidity.l2 -0.270103 0.024509 -11.021 < 2e-16 ***
## pressure.l3 -0.008014 0.002693 -2.976 0.00294 **
## temperature.l3 -0.036781 0.007514 -4.895 1.04e-06 ***
## humidity.l3 0.124404 0.022485 5.533 3.44e-08 ***
## const -23.907760 3.341197 -7.155 1.05e-12 ***
## exo1 -0.425631 0.036282 -11.731 < 2e-16 ***
## exo2 -0.399803 0.041724 -9.582 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.8874 on 2866 degrees of freedom
## Multiple R-Squared: 0.9819, Adjusted R-squared: 0.9818
## F-statistic: 1.555e+04 on 10 and 2866 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## pressure temperature humidity
## pressure 20.948 -4.698 -1.3334
## temperature -4.698 5.847 1.4167
## humidity -1.333 1.417 0.7883
##
## Correlation matrix of residuals:
## pressure temperature humidity
## pressure 1.0000 -0.4245 -0.3281
## temperature -0.4245 1.0000 0.6599
## humidity -0.3281 0.6599 1.0000
col = wes_palette(n = 5, name = 'FantasticFox1')
col <- col[-4]
plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(970,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, gam.prediction$pressure, type = 'l', col = col[1], ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, arma.garch.prediction[[1]]$prediction, type = 'l', col = col[2], ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, varx.prediction[[1]]$fcst, type = 'l', col = col[3], ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, armax.prediction[[1]]$prediction, type = 'l', col = col[4], ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
legend('bottomright', legend = c('observation', 'Nonparametric Trend + ANOVA Seasonality',
'ARMA-GARCH', 'VARX', 'ARMAX'), col = c('black', col), lty = 1,
cex = 0.8)
plot(daily.testing.data$date, daily.testing.data$temperature, type = 'l', ylim = c(264,289), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, gam.prediction$temperature, type = 'l', col = col[1], ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, arma.garch.prediction[[2]]$prediction, type = 'l', col = col[2], ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, varx.prediction[[2]]$fcst, type = 'l', col = col[3], ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, armax.prediction[[2]]$prediction, type = 'l', col = col[4], ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
legend('bottomright', legend = c('observation', 'Nonparametric Trend + ANOVA Seasonality',
'ARMA-GARCH', 'VARX', 'ARMAX'), col = c('black', col), lty = 1,
cex = 0.8)
plot(daily.testing.data$date, daily.testing.data$humidity, type = 'l', ylim = c(1,6), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, gam.prediction$humidity, type = 'l', col = col[1], ylim = c(1,6), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, arma.garch.prediction[[3]]$prediction, type = 'l', col = col[2], ylim = c(1,6), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, varx.prediction[[3]]$fcst, type = 'l', col = col[3], ylim = c(1,6), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, armax.prediction[[3]]$prediction, type = 'l', col = col[4], ylim = c(1,6), ylab = '', cex = 0.6, xlab = 'Date')
legend('bottomright', legend = c('observation', 'Nonparametric Trend + ANOVA Seasonality',
'ARMA-GARCH', 'VARX', 'ARMAX'), col = c('black', col), lty = 1,
cex = 0.8)
gam.mae <- colMeans(abs(daily.testing.data[,2:4] - gam.prediction[,2:4]))
arma.garch.mae <- colMeans(abs(daily.testing.data[,2:4] - cbind(arma.garch.prediction[[1]]$prediction, arma.garch.prediction[[2]]$prediction,arma.garch.prediction[[3]]$prediction)))
varx.mae <- colMeans(abs(daily.testing.data[,2:4] - cbind(varx.prediction[[1]]$fcst, varx.prediction[[2]]$fcst,varx.prediction[[3]]$fcst)))
armax.mae <- colMeans(abs(daily.testing.data[,2:4] - cbind(armax.prediction[[1]]$prediction, armax.prediction[[2]]$prediction,armax.prediction[[3]]$prediction)))
mae <- rbind(gam.mae, arma.garch.mae, varx.mae, armax.mae)
rownames(mae) <- c('Nonparametric Trend + ANOVA Seasonality',
'ARMA-GARCH', 'VARX', 'ARMAX')
kable(mae, align = 'c', row.names = TRUE)
| pressure | temperature | humidity | |
|---|---|---|---|
| Nonparametric Trend + ANOVA Seasonality | 9.073658 | 3.700714 | 0.9060578 |
| ARMA-GARCH | 9.365320 | 3.781821 | 1.0519441 |
| VARX | 8.765609 | 3.587084 | 0.8670008 |
| ARMAX | 9.203988 | 3.225209 | 0.8150322 |
gam.pm <- colSums((gam.prediction[,2:4] - daily.testing.data[,2:4])^2) / colSums((daily.testing.data[,2:4] - colMeans(daily.testing.data[,2:4]))^2)
arma.garch.pm <- colSums((cbind(arma.garch.prediction[[1]]$prediction, arma.garch.prediction[[2]]$prediction,arma.garch.prediction[[3]]$prediction) - daily.testing.data[,2:4])^2) / colSums((daily.testing.data[,2:4] - colMeans(daily.testing.data[,2:4]))^2)
varx.pm <- colSums((cbind(varx.prediction[[1]]$fcst, varx.prediction[[2]]$fcst,varx.prediction[[3]]$fcst) - daily.testing.data[,2:4])^2) / colSums((daily.testing.data[,2:4] - colMeans(daily.testing.data[,2:4]))^2)
armax.pm <- colSums((cbind(armax.prediction[[1]]$prediction, armax.prediction[[2]]$prediction,armax.prediction[[3]]$prediction) - daily.testing.data[,2:4])^2) / colSums((daily.testing.data[,2:4] - colMeans(daily.testing.data[,2:4]))^2)
pm <- rbind(gam.pm, arma.garch.pm, varx.pm, armax.pm)
rownames(pm) <- c('Nonparametric Trend + ANOVA Seasonality',
'ARMA-GARCH', 'VARX', 'ARMAX')
kable(pm, align = 'c', row.names = TRUE)
| pressure | temperature | humidity | |
|---|---|---|---|
| Nonparametric Trend + ANOVA Seasonality | 0.0002111 | 0.0000954 | 3.2e-06 |
| ARMA-GARCH | 0.0002437 | 0.0001049 | 4.6e-06 |
| VARX | 0.0002203 | 0.0000934 | 3.2e-06 |
| ARMAX | 0.0002367 | 0.0000813 | 2.7e-06 |